# -*- coding: utf-8 -*-
import re
import csv
import codecs
import time

import requests
from config.settings import DLTSettings

pattern = '<a href="https://kaijiang.500.com/shtml/dlt/(\d+?).shtml">'

print('开始尝试从 {} 获取最新的大乐透数据...'.format(DLTSettings.LOTTO_DOWNLOAD_URL))
try:
    resp = requests.get(DLTSettings.LOTTO_DOWNLOAD_URL)
    content = resp.content.decode('gb2312')
    if resp.status_code == 200:
        # 解析数据，查看数据集中最新的数据期数
        url = 'https://kaijiang.500.com/shtml/dlt/number.shtml'
        dlt_list = re.findall(pattern, content, flags=re.I | re.S)
        print(dlt_list)
        print(len(dlt_list))
        f = codecs.open(DLTSettings.DATASET_PATH, 'w', 'gbk')
        writer = csv.writer(f)
        for index in dlt_list:
            res = requests.get(url.replace('number', str(index)))
            content1 = res.content.decode('gb2312')
            pattern1 = '<div class="ball_box01">(.*?)</div>'
            number_dom = re.findall(pattern1, content1, flags=re.I | re.S)
            print(number_dom)
            pattern2 = '<li class=".*?">(\d{2})</li>'
            number_list = re.findall(pattern2, number_dom[0], flags=re.I | re.S)
            print(number_list)
            line = [index] + number_list
            writer.writerow(line)
            print('完成！当前最新期数为{}期，请确认期数是否正确！'.format(index))
            time.sleep(0.1)
        f.close()
    else:
        raise Exception('获取数据失败！')
except Exception as e:
    print(e)
